
/* 

Paper: Gentrification and pioneer businesses 
Authors: Behrens, Boulam, Martin, Mayneris 

Name dofile: figure2.do  
Version: 10 nov. 2021 

Output: produces the 8 panels of figure 2 decribing  selected characteristics of workers in pioneer sectors in New York, 2000–2010

- pioneer_high_ny_mean_age.pdf
- pioneer_high_ny_college.pdf
- pioneer_high_ny_realwage.pdf
- pioneer_high_ny_powercouple.pdf
- pioneer_high_ny_childrenshare.pdf
- pioneer_high_ny_marital.pdf
- pioneer_high_ny_share_home.pdf
- pioneer_high_ny_share_close.pdf

Inputs:

 - pioneers_final_negbin.dta [produced in identify_pioneers_negbin.do] [publicly available]
 - stats_by_naics.dta [produced in .do] [publicly available]

*/ 

global list_variables_ny share_close_ny share_home_ny mean_age_ny marital_ny realwage_ny college_ny childrenshare_ny powercouple_ny
global list_variables share_close share_home mean_age marital realwage college childrenshare powercouple


// dataset with info on sector characteristics based on IPUMS for NYC 
// info not always available at the 5-digit level of NAICS 

use "stats_by_naics.dta", clear 
tostring naics, replace 
g naics5=naics if digit==5 
g naics4=naics if digit==4 
g naics3=naics if digit==3 
g naics2=naics if digit==2 
g naics1=naics if digit==1 
keep naics* $list_variables_ny
save temp, replace 


// merge list of sectors (pioneers and non-pioneers) with sector characteristics
use "pioneers_final_negbin.dta", clear
foreach n of num 2(1)5{
gen naics`n'=substr(naics,1,`n')
}

merge m:m naics5 using temp
drop if _m==2
drop _m 
rename share_close share_close_
rename share_home share_home_
rename mean_age mean_age_
rename marital marital_ 
rename realwage realwage_ 
rename college college_
rename childrenshare childrenshare_
rename powercouple powercouple_ 
	
merge m:m naics4 using temp
drop if _m==2
foreach i in $list_variables{
replace `i'_=`i'_ny if _m==3& `i'_==.
}
drop _m $list_variables_ny

merge m:m naics3 using temp
drop if _m==2
foreach i in $list_variables{
replace `i'_=`i'_ny if _m==3& `i'_==.
}
drop _m $list_variables_ny


merge m:m naics2 using temp
drop if _m==2
foreach i in $list_variables{
replace `i'_=`i'_ny if _m==3& `i'_==.
}
drop _m $list_variables_ny

foreach i in $list_variables{
rename `i'_ `i'
}

count if share_close==.
erase temp.dta

// draws boxplots for each sector characteristics (panels of figure 2)

label def pioneer_high 1 "Pioneers" 0 "Non-pioneers"  
label val pioneer_high  pioneer_high

set scheme s1mono	
foreach i in $list_variables {
gr box `i',over(pioneer_high)  noout  
graph export pioneer_high_ny_`i'.pdf, replace 
}



